
library(ggplot2)

library(readxl)
library(scales)
library(dplyr)
library(stringr)


if (!dir.exists('figures'))
  dir.create('figures',recursive = T)

mito1=c('DNM1L', 'FIS1', 'GDAP1', 'MFF', 'MIEF1', 'MIEF2', 'MTFP1', 'SH3GLB1', 'MFN1', 'MFN2', 'OPA1', 'PHB2', 'PLD6', 'STOML2')

mito2=c('PPARGC1A', 'PPARGC1B', 'ERRA', 'TFAM', 'NRF1', 'NFE2L2', 'POLRMT', 'TFB2M', 'SSBP1', 'CYCS','TOMM70A')
mito=union(mito1,mito2)
fitness=read.table('./data/03_scaledBayesianFactors.tsv',header=T,sep='\t',stringsAsFactors = F)

fitness=fitness[fitness$Gene %in% mito,]

curname=gsub('[.]|^X','',names(fitness))
curname[match('A2780ADR',curname)]='A2780'
curname[match('HT29v10',curname)]='HT29'
curname[match('OVCAR3',curname)]='NIHOVCAR3'
curname[match('U251',curname)]='U251MG'


samples=read.csv('./data/sample_info.csv',header=T,sep=',',stringsAsFactors = F)

matched=curname[!is.na(match(tolower(curname),tolower(samples$stripped_cell_line_name)))]

fitness=fitness[,c(1,match(matched,curname))]

plotdata=data.frame(t(fitness[,-1]))
colnames(plotdata)=fitness$Gene

samples$disease=sapply(str_split(samples$CCLE_name,'_',n=2), "[[", 2)
plotdata$disease=samples$disease_sutype[match(tolower(matched),tolower(samples$stripped_cell_line_name))]

disease_names=sort(unique(plotdata$disease))

color_temp=c("pink1", "violet", "mediumpurple1", "slateblue1", "purple", "purple3",
             "turquoise2", "skyblue", "steelblue", "blue2", "navyblue",
             "orange", "tomato", "coral2", "palevioletred", "violetred", "red2",
             "springgreen2", "yellowgreen", "palegreen4",
             "wheat2", "tan", "tan2", "tan3", "brown",
             "grey70", "grey50", "grey30")


n <- 18 #number of colors
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
color_temp = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
disease_colors=color_temp[1:length(disease_names)]
names(disease_colors)=disease_names

origdata=plotdata
i=7 #MFN2
plotdata=origdata
curgene=names(plotdata)[i]
names(plotdata)[i]='cur_gene'
plotdata=plotdata[plotdata$disease!= " " & !is.na(plotdata$disease),]

### reorder disease
data1=plotdata %>% group_by(disease) %>% 
  summarize(avg=mean(cur_gene),sd=sd(cur_gene,na.rm=T),count=sqrt(n())) 
data1$sd[data1$count==1]=0
data1$sd=data1$sd/sqrt(data1$count)
data1=data1[data1$count>1,]
data1=data1[order(data1$avg),]
data1$disease <- factor(data1$disease, levels = data1$disease)

plotdata=inner_join(plotdata,data1)
plotdata$disease <- factor(plotdata$disease, levels = data1$disease)

p1=    
  ggplot(data=plotdata, aes(x=disease, y=cur_gene)) +
  stat_boxplot(geom ='errorbar', width = 0.4) +
  geom_boxplot(aes(fill=disease),  outlier.colour = NA)+
  
  stat_summary(fun = mean, geom = "errorbar", aes(ymax =..y.., ymin =..y..),
               width = .75, linetype = "dashed")   +
  ylab('MFN2 dependency score')+
  scale_fill_manual(values = disease_colors,guide='none')+
  theme( # remove the vertical grid lines
    aspect.ratio=0.6,
    panel.background = element_rect(fill = "white",
                                    colour = "black",
                                    size = .5, linetype = "solid"),
    panel.grid.major.x = element_blank() ,
    # explicitly set the horizontal lines (or they will disappear too)
    panel.grid.major.y = element_line( size=.1, color="grey" ),
    legend.text = element_text(colour="black", size=8))+
  theme(axis.text.x = element_text(angle = 65, hjust = 1,size=8),panel.grid=element_blank(),legend.background=element_blank())+
  ggtitle(curgene)+
  theme(plot.title = element_text(hjust = 0.5)) 

ggsave(paste('./figures/Fig_2D.pdf',sep='',collapse=''),p1,width=10,height=6)





